package com.metis.document.parse.dialog.utils.chaos;

import org.apache.commons.lang3.StringUtils;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 判断文本是否包含乱码
 *
 * @author levi
 */
public final class ChaosUtils {

    private ChaosUtils() {
    }

    /**
     * isChinese.
     *
     * @param c c
     * @return boolean
     */
    public static boolean isChinese(final char c) {
        final Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
        if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
                || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
                || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
                || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
                || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
                || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
            return true;
        }
        return false;
    }

    /**
     * isContainChaosCode.
     *
     * @param strName strName
     * @return boolean
     */
    public static boolean isContainChaosCode(final String strName) {
        return isContainChaosCode(strName, "");
    }

    /**
     * isContainMessyCode.
     *
     * @param strName strName
     * @param log     log
     * @return boolean
     */
    public static boolean isContainChaosCode(final String strName, final String log) {
        if (StringUtils.isBlank(strName)) {
            return false;
        }
        final Pattern p = Pattern.compile("\\s*|\t*|\r*|\n*");
        final Matcher m = p.matcher(strName);
        final String after = m.replaceAll("");
        final String temp = after.replaceAll("\\p{P}", "")
                .replaceAll("`", "")
                .replaceAll("~", "")
                .replaceAll("\\$", "")
                .replaceAll("\\^", "")
                .replaceAll("\\+", "")
                .replaceAll("=", "")
                .replaceAll("<", "")
                .replaceAll(">", "")
                .replaceAll("\\|", "");
        final char[] ch = temp.trim().toCharArray();
        for (int i = 0; i < ch.length; i++) {
            final char c = ch[i];
            if (!Character.isLetterOrDigit(c)) {

                if (!isChinese(c)) {
                    System.out.println("isMessyCode: " + c + " -> " + strName + " -> " + log);
                    return true;
                }
            }
        }
        return false;

    }

}

